######################################################################
### R file to examine in-sample prediction accuracy across weights matrices for  "All Economics Is Local"
###
### Created: 3-4-16
### Modified:
###
### NOTE: produces the in-sample comparisons in Table 3
######################################################################

## load packages and read data ##

library(foreign)
library(MASS)
library(mnormt)
library(nonnest2)
library(stargazer)

## Set up working directory ##
#setwd("")
	
data <- read.dta('All Economics Is Local.dta')

set.seed(0624)

## estimate all the models then calculate predicted values ##

## National
national <- polr(retnat ~ gsp_pc2_ch + gdppc_growth + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(national)
natVal <- predict(national, type = 'probs', na.action = na.exclude)
natPreds <- natVal[, 1] + (natVal[, 2] * 2) + (natVal[, 3] * 3)
natDiff <- natPreds - as.numeric(data$retnat)
natDiff <- na.omit(natDiff)
summary(abs(natDiff))

## Contiguity (W2)
cont <- polr(retnat ~ gsp_pc2_ch + sp_W2_gsp_pc2_ch + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(cont)
contVal <- predict(cont, type = 'probs', na.action = na.exclude)
contPreds <- contVal[, 1] + (contVal[, 2] * 2) + (contVal[, 3] * 3)
contDiff <- contPreds - as.numeric(data$retnat)
contDiff <- na.omit(contDiff)
summary(abs(contDiff))

## Cosponsorship (W7)
cosponsor <- polr(retnat ~ gsp_pc2_ch + sp_W7_gsp_pc2_ch + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(cosponsor)

cosponsorVal <- predict(cosponsor, type = 'probs', na.action = na.exclude)
cosponsorPreds <- cosponsorVal[, 1] + (cosponsorVal[, 2] * 2) + (cosponsorVal[, 3] * 3)
cosponsorDiff <- cosponsorPreds - as.numeric(data$retnat)
cosponsorDiff <- na.omit(cosponsorDiff)
summary(abs(cosponsorDiff))

## Media messages (undirected) (W10U)
media <- polr(retnat ~ gsp_pc2_ch + sp_W10U_gsp_pc2_ch + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(media)

mediaVal <- predict(media, type = 'probs', na.action = na.exclude)
mediaPreds <- mediaVal[, 1] + (mediaVal[, 2] * 2) + (mediaVal[, 3] * 3)
mediaDiff <- mediaPreds - as.numeric(data$retnat)
mediaDiff <- na.omit(mediaDiff)
summary(abs(mediaDiff))

## Cross-border employment (W12)
cross <- polr(retnat ~ gsp_pc2_ch + sp_W12_gsp_pc2_ch + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(cross)

crossVal <- predict(cross, type = 'probs', na.action = na.exclude)
crossPreds <- crossVal[, 1] + (crossVal[, 2] * 2) + (crossVal[, 3] * 3)
crossDiff <- crossPreds - as.numeric(data$retnat)
crossDiff <- na.omit(crossDiff)
summary(abs(crossDiff))

## Economic similarity (W14)
econ <- polr(retnat ~ gsp_pc2_ch + sp_W14_gsp_pc2_ch + ch_unem + inflation + approve + age +
	male + nonwhite + union + college + married + unemployed + own_home + ppid + noppid,
	data = data, method = 'logistic', Hess = TRUE, na.action = na.exclude)
summary(econ)

econVal <- predict(econ, type = 'probs', na.action = na.exclude)
econPreds <- econVal[, 1] + (econVal[, 2] * 2) + (econVal[, 3] * 3)
econDiff <- econPreds - as.numeric(data$retnat)
econDiff <- na.omit(econDiff)
summary(abs(econDiff))


## this piece of code compares the fit for the models and generates ##
## proportion of better fit and probability of observing that difference shown in Table 3 ##

table(abs(mediaDiff) < abs(econDiff)) / length(mediaDiff)
1 - pbinom(length(mediaDiff[abs(mediaDiff) < abs(econDiff)]), size = length(mediaDiff), prob = 0.5)

table(abs(econDiff) < abs(cosponsorDiff)) / length(econDiff)
1 - pbinom(length(econDiff[abs(econDiff) < abs(cosponsorDiff)]), size = length(econDiff), prob = 0.5)

table(abs(cosponsorDiff) < abs(crossDiff)) / length(cosponsorDiff)
1 - pbinom(length(cosponsorDiff[abs(cosponsorDiff) < abs(crossDiff)]), size = length(cosponsorDiff), prob = 0.5)

table(abs(crossDiff) < abs(contDiff)) / length(crossDiff)
1 - pbinom(length(crossDiff[abs(crossDiff) < abs(contDiff)]), size = length(contDiff), prob = 0.5)

table(abs(contDiff) < abs(natDiff)) / length(contDiff)
1 - pbinom(length(contDiff[abs(contDiff) < abs(natDiff)]), size = length(natDiff), prob = 0.5)
